Designing and evaluating an online reinforcement learning agent for physical exercise recommendations in N-of-1 trials¶

This notebook is used to evaluate the performance for the RL agent using data from the simulate_data.ipynb notebook.

Imports¶

In [1]:
%load_ext autoreload
%autoreload 2
In [2]:
# Generic Imports
import pandas as pd
import pandas
import seaborn as sns
import matplotlib.pylab as plt
import numpy as np
import arviz as az
import arviz
import numpy
import hvplot
import holoviews
import random
import itertools
from tqdm.notebook import tqdm
from bokeh.io import show
from bokeh.layouts import column
import jsonpickle
In [3]:
# Simulation Library Imports
from nof1rl4endo import *

Load from disk¶

In [4]:
## Load jsonpickle from disk:
with open("../data/2023-09-20-series.json", "r") as file:
    calculated_series, config_to_simulation_data = jsonpickle.decode(file.read())

Analysis¶

Allocations¶

In [5]:
titles = [
    f"{entry['result'].simulations[0].model}, {entry['result'].simulations[0].policy}"
    for entry in calculated_series
]


def plot_allocations(calculated_series, titles, treatment_name):
    panels = [
        entry["result"].plot_allocations(treatment_name) for entry in calculated_series
    ]
    for title, panel in zip(titles, panels):
        panel.opts(title=title)
    return holoviews.Layout(panels).cols(1)


plot_allocations(calculated_series, titles, treatment_name="selection_index")
Out[5]:

Custom Metric Definitions and Processing¶

In [6]:
# Filter only the observations where Thompson Sampling was used
ab_randomization_str = "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)'])"
ba_randomization_str = "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)', 'SelectionPolicyFixedPolicy\\n'])"


CUT_TO_ADAPTIVE_INTERVENTIONS = True
if CUT_TO_ADAPTIVE_INTERVENTIONS:
    ab_randomization_start = 21
    ba_randomization_start = 7
    for entry in calculated_series:
        start = None
        if entry["configuration"]["policy"] == ab_randomization_str:
            start = ab_randomization_start
        if entry["configuration"]["policy"] == ba_randomization_str:
            start = ba_randomization_start

        if start is not None:
            result = entry["result"]
            simulations = result.simulations
            for simulation in simulations:
                simulation.history = simulation.history[start : start + 14]

series = [entry["result"] for entry in calculated_series]
In [7]:
def switch_to_fixed_policy(config):
    config = config.copy()
    config[
        "policy"
    ] = "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n'])"
    return config


metrics = [
    Entropy(outcome_name="type"),
    StandardDeviation(treatment_name="intensity"),
    StandardDeviation(treatment_name="duration"),
    RegretAgainstOtherConfiguration(
        outcome_name="pain_reduction",
        name="FixedPolicy",
        configuration_transform_function=switch_to_fixed_policy,
        config_to_simulation_data=config_to_simulation_data,
    ),
    ProbabilityIntervalMin(
        policy_filter_names=["ab_randomization_str", "ba_randomization_str"]
    ),
    ProbabilityIntervalMax(
        policy_filter_names=["ab_randomization_str", "ba_randomization_str"]
    ),
]

Plot sum of rewards (debug output)¶

In [8]:
SeriesOfSimulationsData.plot_lines(
    series, [SimpleRegret(outcome_name="pain_reduction")], legend_position=(0, 1.75)
)
Out[8]:
<Axes: xlabel='t', ylabel='Regret'>
No description has been provided for this image

Paper output generation¶

Table¶

In [9]:
name_mapping = {
    "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n'])": "Fixed",
    "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)'])": "AB Randomization",
    "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)', 'SelectionPolicyFixedPolicy\\n'])": "BA Randomization",
}

df = SeriesOfSimulationsData.score_data(
    series, metrics, {"policy": lambda x: name_mapping[x]}
)
filtered_df = df.loc[
    (
        (df["policy"] == "AB Randomization") & (df["t"] == 34)
        | (df["policy"] == "BA Randomization") & (df["t"] == 20)
        | (df["policy"] == "Fixed") & (df["t"] == 34)
    )
]
filtered_df
Out[9]:
t score simulation patient_id model policy metric
34 34 1.290845 SimulationData 0 Scenario I Fixed Entropy (type)
34 34 1.290845 SimulationData 1 Scenario I Fixed Entropy (type)
34 34 1.290845 SimulationData 2 Scenario I Fixed Entropy (type)
34 34 1.290845 SimulationData 3 Scenario I Fixed Entropy (type)
34 34 1.290845 SimulationData 4 Scenario I Fixed Entropy (type)
... ... ... ... ... ... ... ...
13 20 0.000000 SimulationData 5 Scenario IV BA Randomization ProbabilityIntervalMax
13 20 0.000000 SimulationData 6 Scenario IV BA Randomization ProbabilityIntervalMax
13 20 0.000000 SimulationData 7 Scenario IV BA Randomization ProbabilityIntervalMax
13 20 0.000000 SimulationData 8 Scenario IV BA Randomization ProbabilityIntervalMax
13 20 0.000000 SimulationData 9 Scenario IV BA Randomization ProbabilityIntervalMax

720 rows × 7 columns

In [10]:
filtered_score_groupby = filtered_df.groupby(["model", "policy", "metric"])
means_table = filtered_score_groupby["score"].mean().unstack().unstack()
quantile_table = filtered_score_groupby["score"].quantile(0.75).unstack().unstack()

randomizations = ["AB Randomization", "BA Randomization"]
In [11]:
groupby_columns = ["model", "policy"]
pivoted_df = filtered_df.pivot(
    index=["model", "policy", "simulation", "patient_id"],
    columns="metric",
    values="score",
)
table = pivoted_df.groupby(groupby_columns).mean()

table["Regret 0.75 quantile"] = pivoted_df.groupby(groupby_columns).quantile(0.75)[
    "RegretAgainstOtherConfiguration(FixedPolicy)"
]
table = table.loc[(slice(None), randomizations), :]
In [12]:
table
Out[12]:
metric Entropy (type) ProbabilityIntervalMax ProbabilityIntervalMin RegretAgainstOtherConfiguration(FixedPolicy) std(duration) std(intensity) Regret 0.75 quantile
model policy
Scenario I AB Randomization 1.140387 0.0 0.0 0.000000 0.249303 0.298788 0.000000
BA Randomization 1.066846 0.0 0.0 0.000000 0.268321 0.311346 0.000000
Scenario II AB Randomization 0.682043 0.0 0.0 -14.253217 0.203038 0.220925 -7.824710
BA Randomization 0.678126 0.0 0.0 -12.621704 0.206248 0.225740 -4.707913
Scenario III AB Randomization 0.993269 0.0 0.0 -5.562863 0.243266 0.275533 -1.990651
BA Randomization 0.855366 0.0 0.0 -6.861749 0.253340 0.280710 -3.159108
Scenario IV AB Randomization 0.999499 0.0 0.0 -7.686238 0.238633 0.278250 -3.537417
BA Randomization 0.949294 0.0 0.0 -6.189339 0.249756 0.277468 -1.508569
In [13]:
print(
    table[
        [
            "RegretAgainstOtherConfiguration(FixedPolicy)",
            "Regret 0.75 quantile",
            "Entropy (type)",
            "std(duration)",
            "std(intensity)",
            "ProbabilityIntervalMax",
            "ProbabilityIntervalMin",
        ]
    ]
    .style.format(precision=2)
    .to_latex()
)
\begin{tabular}{llrrrrrrr}
 & metric & RegretAgainstOtherConfiguration(FixedPolicy) & Regret 0.75 quantile & Entropy (type) & std(duration) & std(intensity) & ProbabilityIntervalMax & ProbabilityIntervalMin \\
model & policy &  &  &  &  &  &  &  \\
\multirow[c]{2}{*}{Scenario I} & AB Randomization & 0.00 & 0.00 & 1.14 & 0.25 & 0.30 & 0.00 & 0.00 \\
 & BA Randomization & 0.00 & 0.00 & 1.07 & 0.27 & 0.31 & 0.00 & 0.00 \\
\multirow[c]{2}{*}{Scenario II} & AB Randomization & -14.25 & -7.82 & 0.68 & 0.20 & 0.22 & 0.00 & 0.00 \\
 & BA Randomization & -12.62 & -4.71 & 0.68 & 0.21 & 0.23 & 0.00 & 0.00 \\
\multirow[c]{2}{*}{Scenario III} & AB Randomization & -5.56 & -1.99 & 0.99 & 0.24 & 0.28 & 0.00 & 0.00 \\
 & BA Randomization & -6.86 & -3.16 & 0.86 & 0.25 & 0.28 & 0.00 & 0.00 \\
\multirow[c]{2}{*}{Scenario IV} & AB Randomization & -7.69 & -3.54 & 1.00 & 0.24 & 0.28 & 0.00 & 0.00 \\
 & BA Randomization & -6.19 & -1.51 & 0.95 & 0.25 & 0.28 & 0.00 & 0.00 \\
\end{tabular}

Figures¶

In [ ]:
# AB Randomization Plot
series_to_plot = [
    entry["result"]
    for entry in calculated_series
    if str(entry["result"].simulations[0].policy)
    == "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)'])"
]
metrics = [
    RegretAgainstOtherConfiguration(
        outcome_name="pain_reduction",
        name="FixedPolicy",
        configuration_transform_function=switch_to_fixed_policy,
        config_to_simulation_data=config_to_simulation_data,
    ),
]
SeriesOfSimulationsData.plot_lines(
    series_to_plot, metrics, hue="model", legend_position=(0, 0.3)
)
plt.xlim(21, 34)
plt.ylim([-14, 1])


plt.savefig("regret_over_time_AB.pdf", bbox_inches="tight")
In [ ]:
# BA Randomization Plot
series_to_plot = [
    entry["result"]
    for entry in calculated_series
    if str(entry["result"].simulations[0].policy)
    == "ComposedPolicy(['SelectionPolicyFixedPolicy\\n', 'SelectionPolicyThompsonSampling(PhysicalExerciseModel)', 'SelectionPolicyFixedPolicy\\n'])"
]
metrics = [
    RegretAgainstOtherConfiguration(
        outcome_name="pain_reduction",
        name="FixedPolicy",
        configuration_transform_function=switch_to_fixed_policy,
        config_to_simulation_data=config_to_simulation_data,
    ),
]
SeriesOfSimulationsData.plot_lines(
    series_to_plot, metrics, hue="model", legend_position=(0, 0.3)
)

plt.xlim(7, 20)
plt.ylim([-14, 1])
plt.savefig("regret_over_time_BA.pdf", bbox_inches="tight")

Calculate Entropy and Standard Deviation for the fixed treatment sequence¶

In [ ]:
df.loc[(df["policy"] == "Fixed") & (df["t"] == 13)].groupby("metric").mean(
    numeric_only=True
)
In [ ]: